# delimit ;
set more off ;
capture log close ;
clear all ;

/* CODE FOR UPPER NEUSE CASE STUDY */
/* AUTHOR: ROGER H. VON HAEFEN  */
/* LAST EDITED: OCTOBER 1, 2021 */


/* DO FILE GENERATES WTP FOR ALTERNATIVE CLEANUP SCENARIOS IN THE UPPER NEUSE WATERSHED */

log using $case_study_log, replace text ;
log off ;


/* WQ PREDICTIONS FOR ALTERNATIVE SCENARIOS */
/* WQ PREDICTIONS FOR ALTERNATIVE SCENARIOS */
/* WQ PREDICTIONS FOR ALTERNATIVE SCENARIOS */

import excel $scenarios_data, first ;
order stream scenario share bi fc sc tn tp turb ;
gen strm_id = 1*inlist(stream,"Crabtree","Walnut") + 2*inlist(stream,"Middle","Swift") ;

foreach var in bi fc sc tn tp turb { ;
   egen double `var'1 = sum(share*`var'), by(scenario strm_id) ;
   drop `var' ;
   rename `var'1 `var' ;
} ;

label var bi "Biotic Index" ;
label var fc "Fecal Coliform" ;
label var sc "Specific Conductivity" ;
label var tn "Total Nitrogen" ;
label var tp "Total Phosphorus" ;
label var turb "Turbidity" ;
keep if inlist(stream,"Crabtree","Middle") ;
replace stream = "Crabtree & Walnut" if stream == "Crabtree" ;
replace stream = "Middle & Swift" if stream == "Middle" ;
drop strm_id share ;
tempfile scenarios ;
save `scenarios', replace ;




/* EXPERT ELICITATION */
/* EXPERT ELICITATION */
/* EXPERT ELICITATION */

/* ECOSYSTEM CONDITIONS & MURKY WATER DAYS */

use $ee_data, replace ;
drop if inlist(id,6,7,8) ;

gen ct = 3 ;
expand ct ;
drop ct ;
sort id question ;
by id question : gen cat = _n ;

gen double test = ec_good + ec_fair + ec_poor ;
assert test == 100 ;
replace ec_good = 100*ec_good/test ;
replace ec_fair = 100*ec_fair/test ;
replace ec_poor = 100*ec_poor/test ;
drop test ;

gen double test   = mw_low + mw_med + mw_high ;
sum test ;
replace mw_low    = 100*mw_low/test ;
replace mw_med    = 100*mw_med/test ;
replace mw_high   = 100*mw_high/test ;
drop test ;

gen ec = 0 ;
replace ec = round(ec_good) if cat == 1 ;
replace ec = round(ec_fair) if cat == 2 ;
replace ec = round(ec_poor) if cat == 3 ;

gen mw = 0 ;
replace mw = round(mw_low)    if cat == 1 ;
replace mw = round(mw_med)    if cat == 2 ;
replace mw = round(mw_high)   if cat == 3 ;

gen id1 = (id == 1) ; label var id1 "EXPERT #1" ;
gen id2 = (id == 2) ; label var id2 "EXPERT #2" ;
gen id3 = (id == 3) ; label var id3 "EXPERT #3" ;
gen id4 = (id == 4) ; label var id4 "EXPERT #4" ;
gen id5 = (id == 5) ; label var id5 "EXPERT #5" ;

order id question ec mw ;

/* ECOSYSTEM CONDITIONS */

preserve ; 
  drop if ec == 0 ;
  log on ;
  oprobit cat bi fc sc tn tp turb id1 id2 id3 id4 id5 [fw=ec], vce(cluster id) ;
  log off ;
  
  local bi_ec = _b[bi] ;
  local fc_ec = _b[fc] ;
  local sc_ec = _b[sc] ;
  local tn_ec = _b[tn] ;
  local tp_ec = _b[tp] ;
  local turb_ec = _b[turb] ;
  local id_ec = (_b[id1]+_b[id2]+_b[id3]+_b[id4]+_b[id5])/5 ;
  local cut1_ec = _b[/cut1] ;
  local cut2_ec = _b[/cut2] ;
  di `cut1_ec' ;
restore ;

/* MURKY WATER DAYS */

preserve ;
  drop if mw == 0 ;
  log on ;
  oprobit cat bi fc sc tn tp turb id1 id2 id3 id4 id5 [fw=mw], vce(cluster id) ;
  log off ;

  local bi_mw = _b[bi] ;
  local fc_mw = _b[fc] ;
  local sc_mw = _b[sc] ;
  local tn_mw = _b[tn] ;
  local tp_mw = _b[tp] ;
  local turb_mw = _b[turb] ;
  local id_mw = (_b[id1]+_b[id2]+_b[id3]+_b[id4]+_b[id5])/5 ;
  local cut1_mw = _b[/cut1] ;
  local cut2_mw = _b[/cut2] ;
  di `cut1_mw' ;
restore ;




/* HEALTH RISK */

use $ee_data, replace ;
sort question id ;
by question : assert bi == bi[1] ;
by question : assert fc == fc[1] ;
by question : assert sc == sc[1] ;
by question : assert tn == tn[1] ;
by question : assert tp == tp[1] ;
by question : assert turb == turb[1] ;

keep if inlist(id,6,7,8) ;

gen ct = 3 ;
expand ct ;
drop ct ;
sort id question ;
by id question : gen cat = _n ;

gen double test = hr_low + hr_medium + hr_high ;
assert test == 100 ;
drop test ;

gen hr = 0 ;
replace hr = round(hr_low)    if cat == 1 ;
replace hr = round(hr_medium) if cat == 2 ;
replace hr = round(hr_high)   if cat == 3 ;

order id question hr ;

gen id6 = (id == 6) ; label var id6 "EXPERT #6" ;
gen id7 = (id == 7) ; label var id7 "EXPERT #7" ;
gen id8 = (id == 8) ; label var id8 "EXPERT #8" ;

drop if hr == 0 ;

preserve ;
  log on ;
  oprobit cat bi  fc  sc  tn  tp  turb  id6 id7 id8 [fw=hr], vce(cluster id) ;  
  log off ;

  local bi_hr = _b[bi] ;
  local fc_hr = _b[fc] ;
  local sc_hr = _b[sc] ;
  local tn_hr = _b[tn] ;
  local tp_hr = _b[tp] ;
  local turb_hr = _b[turb] ;

  local id_hr = (_b[id6]+_b[id7]+_b[id8])/3 ;
  local cut1_hr = _b[/cut1] ;
  local cut2_hr = _b[/cut2] ;
  di `cut1_hr' ;
restore ;
sum bi fc sc tn tp turb ;


use `scenarios', replace ;
sum bi fc sc tn tp turb ;

gen double index_ec = `bi_ec'*bi + `fc_ec'*fc + `sc_ec'*sc + `tn_ec'*tn + `tp_ec'*tp + `turb_ec'*turb + `id_ec' ;
gen double index_mw = `bi_mw'*bi + `fc_mw'*fc + `sc_mw'*sc + `tn_mw'*tn + `tp_mw'*tp + `turb_mw'*turb + `id_mw' ;
gen double index_hr = `bi_hr'*bi + `fc_hr'*fc + `sc_hr'*sc + `tn_hr'*tn + `tp_hr'*tp + `turb_hr'*turb + `id_hr' ;

gen double ec_best  = 100*(normal(`cut1_ec'-index_ec)) ;
gen double ec_mid   = 100*(normal(`cut2_ec'-index_ec) - normal(`cut1_ec'-index_ec)) ;
gen double ec_worst = 100*(1-normal(`cut2_ec'-index_ec)) ;

gen double mw_best  = 100*(normal(`cut1_mw'-index_mw)) ;
gen double mw_mid   = 100*(normal(`cut2_mw'-index_mw) - normal(`cut1_mw'-index_mw)) ;
gen double mw_worst = 100*(1-normal(`cut2_mw'-index_mw)) ;

gen double hr_best  = 100*(normal(`cut1_hr'-index_hr)) ;
gen double hr_mid   = 100*(normal(`cut2_hr'-index_hr) - normal(`cut1_hr'-index_hr)) ;
gen double hr_worst = 100*(1-normal(`cut2_hr'-index_hr)) ;

sort stream scenario ;

/* GENERATING CHANGES IN WQ RELATIVE TO BASELINE */

replace ec_best  = ec_best[2]  - ec_best[1]  if _n == 2 ;
replace ec_worst = ec_worst[2] - ec_worst[1] if _n == 2 ;

replace mw_best  = mw_best[2]  - mw_best[1]  if _n == 2 ;
replace mw_worst = mw_worst[2] - mw_worst[1] if _n == 2 ;

replace hr_best  = hr_best[2]  - hr_best[1]  if _n == 2 ;
replace hr_worst = hr_worst[2] - hr_worst[1] if _n == 2 ;

replace ec_best  = ec_best[3]  - ec_best[1]  if _n == 3 ;
replace ec_worst = ec_worst[3] - ec_worst[1] if _n == 3 ;

replace mw_best  = mw_best[3]  - mw_best[1]  if _n == 3 ;
replace mw_worst = mw_worst[3] - mw_worst[1] if _n == 3 ;

replace hr_best  = hr_best[3]  - hr_best[1]  if _n == 3 ;
replace hr_worst = hr_worst[3] - hr_worst[1] if _n == 3 ;

replace ec_best  = ec_best[5]  - ec_best[4]  if _n == 5 ;
replace ec_worst = ec_worst[5] - ec_worst[4] if _n == 5 ;

replace mw_best  = mw_best[5]  - mw_best[4]  if _n == 5 ;
replace mw_worst = mw_worst[5] - mw_worst[4] if _n == 5 ;

replace hr_best  = hr_best[5]  - hr_best[4]  if _n == 5 ;
replace hr_worst = hr_worst[5] - hr_worst[4] if _n == 5 ;

replace ec_best  = ec_best[6]  - ec_best[4]  if _n == 6 ;
replace ec_worst = ec_worst[6] - ec_worst[4] if _n == 6 ;

replace mw_best  = mw_best[6]  - mw_best[4]  if _n == 6 ;
replace mw_worst = mw_worst[6] - mw_worst[4] if _n == 6 ;

replace hr_best  = hr_best[6]  - hr_best[4]  if _n == 6 ;
replace hr_worst = hr_worst[6] - hr_worst[4] if _n == 6 ;

drop if _n == 1 | _n == 4 ;

keep stream scenario ec_best ec_worst mw_best mw_worst hr_best hr_worst ;
sort stream scenario ;
save `scenarios', replace ;


/* SP SURVEY DATA ESTIMATION */
/* SP SURVEY DATA ESTIMATION */
/* SP SURVEY DATA ESTIMATION */

use $estimation_data, replace ;

/* DROPPING:										*/
/*    1) SPEEDING PEOPLE (<=8 MINUTES TO COMPLETE SURVEY)				*/
/*    2) PEOPLE WHO TAKE MORE THAN A WEEK TO COMPLETE THE SURVEY			*/
/*    3) PEOPLE WHO FAIL THE TRAP QUESTION						*/
/*    4) PEOPLE WHO DISAGREE OR STRONGLY DISAGREE WITH THE CONSEQUENTIALITY STATEMENTS  */

keep if survey_duration > 8 & survey_duration <= (7*24*60) & inlist(debrief_3,"D") & !inlist(debrief_1,"D","SD") & !inlist(debrief_2,"D","SD") ;
gen cost_dis = cost/(1+near_dist) ;

log on ;
logit ce cost          ec_g ec_p hr_g hr_p md_g md_p age gender white college [pw=wgt_final], cluster(resp_id) ;
nlcom wtp : -12*(_b[ec_g]-_b[ec_p]+_b[hr_g]-_b[hr_p]+_b[md_g]-_b[md_p])/_b[cost] ;

logit ce cost cost_dis ec_g ec_p hr_g hr_p md_g md_p age gender white college [pw=wgt_final], cluster(resp_id) ;
nlcom wtp : -12*(_b[ec_g]-_b[ec_p]+_b[hr_g]-_b[hr_p]+_b[md_g]-_b[md_p])/(_b[cost]+_b[cost_dis]/(1+ 9.584872)) ;  /* WTP FOR PERSON LIVING AVERAGE DISTANCE FROM CENTROID CLEANED-UP STREAMS */
nlcom wtp : -12*(_b[ec_g]-_b[ec_p]+_b[hr_g]-_b[hr_p]+_b[md_g]-_b[md_p])/(_b[cost]+_b[cost_dis]/(1+ 1)) ;         /* WTP FOR PERSON LIVING ONE MILE         FROM CENTROID CLEANED-UP STREAMS */
log off ;

local cost = _b[cost] ;
local cost_dis = _b[cost_dis] ;

gen double mui = -(`cost' + `cost_dis'/(1+near_dist)) ;
local ec_g = _b[ec_g] ;
local ec_p = _b[ec_p] ;
local hr_g = _b[hr_g] ;
local hr_p = _b[hr_p] ;
local md_g = _b[md_g] ;
local md_p = _b[md_p] ;
gen double cons = _b[age]*age + _b[gender]*gender + _b[white]*white + _b[college]*college ;


/* UPPER NEUSE CASE STUDY */
/* UPPER NEUSE CASE STUDY */
/* UPPER NEUSE CASE STUDY */

tab county ;
keep if county == "W" ;
sort resp_id ; by resp_id : keep if _n == 1 ;
keep resp_id wgt_final mui cons near_dist ;
di _N ;
cross using `scenarios' ;
di _N ;

gen miles = 0 ;

/* ACCORDING TO THE WQ MODELING EXERCISE, 		*/	
/*	CRABTREE CREEK IS 226.634 KMS OF STREAMS 	*/
/*	WALNUT   CREEK IS  68.459 KMS OF STREAMS 	*/
/*	MIDDLE   CREEK IS 110.308 KMS OF STREAMS 	*/
/*	SWIFT    CREEK IS 121.143 KMS OF STREAMS 	*/

/* NOTE: THE SP SURVEY TELLS PEOPLE THAT CRABTREE AND WALNUT CREEKS ARE ROUGHLY 100 MILES IN LENGTH */
/* THE WQ MODELING EXERCISE USES A MORE EXPANSIVE MEASURE OF STREAM MILES (INCLUDING SUBSTREAMS)    */

replace miles = (226.634+68.459)/1.60934 if stream == "Crabtree & Walnut" ;  
replace miles = (110.308+121.143)/1.60934 if stream == "Middle & Swift" ;

gen double wtp = 12*(`ec_g'*ec_best +`ec_p'*ec_worst + `hr_g'*hr_best +`hr_p'*hr_worst + `md_g'*mw_best +`md_p'*mw_worst)/(mui) ;  /* IGNORING CONSTANT TERM */
replace wtp = wtp*(110.308+121.143)/(226.634+68.459) if stream == "Middle & Swift" ;                                               /* ADJUSTING WTP BECAUSE PROPORTIONALLY FEWER STREAMS ARE AFFECTED */
rename near_dis dis ;

log on ;
sum wtp [w=wgt_final] if            dis < 1  & stream == "Crabtree & Walnut" & scenario == 1 ;
sum wtp [w=wgt_final] if dis >= 1 & dis < 2  & stream == "Crabtree & Walnut" & scenario == 1 ;
sum wtp [w=wgt_final] if dis >= 2 & dis < 5  & stream == "Crabtree & Walnut" & scenario == 1 ;
sum wtp [w=wgt_final] if dis >= 5 & dis < 10 & stream == "Crabtree & Walnut" & scenario == 1 ;
sum wtp [w=wgt_final] if dis >=10 & dis < 30 & stream == "Crabtree & Walnut" & scenario == 1 ;
sum dis, detail ;

sort stream scenario ;
di "HOUSEHOLD-LEVEL WAKE COUNTY BENEFITS" ;
collapse (mean) wtp [w=wgt_final], by(stream scenario) ;
list ;
replace wtp = wtp*400172 ;   /* WAKE COUNTY HOUSEHOLDS IS 400,172 - https://www.census.gov/quickfacts/fact/table/wakecountynorthcarolina/PST045219 */
di "AGGREGATE WAKE COUNTY BENEFITS" ;
list ;
log close ;